rm(list=ls()) 

###############################################################################
# ZONEH PROCESSING SCRIPT
## creates following data files: 
## - zoneh_year_data.RData
###############################################################################

#load packages
library(countrycode)
library(peacesciencer)
library(lubridate)
library(tidyverse)

#setwd 
setwd("/Users/cbj4/Desktop/replication")

##############################
#STEP 1: DATA LOAD AND CLEAN
##############################

# load data in scrape format
load("raw_data/zoneh_raw.RData")

####################################
# STEP 2: CREATE ATTACK COUNTS
####################################

#separate date
zoneh_data_clean <- separate(zoneh_data_clean, date, into = c("year", "month", "day"), sep = "/")

#create ccodes 
zoneh_data_clean$ccode <- countrycode(zoneh_data_clean$country, origin = "country.name",
                                      destination = "cown")
zoneh_data_clean$ccode <- ifelse(zoneh_data_clean$country == "Phillipines ", 840, zoneh_data_clean$ccode)
zoneh_data_clean$ccode <- ifelse(zoneh_data_clean$country == "Serbia", 345, zoneh_data_clean$ccode)

#create group-year variable
zoneh_data_clean$attacker_year <- paste(zoneh_data_clean$ccode, 
                                        zoneh_data_clean$group_id,zoneh_data_clean$year,sep="_")
length(unique(zoneh_data_clean$attacker_year))

zoneh_data_clean$attack_bin <- rep(1)
unique_attacker_year <- aggregate(zoneh_data_clean$attack_bin, list(zoneh_data_clean$attacker_year), sum)
unique_attacker_year <- separate(unique_attacker_year, col = Group.1, 
                                 into = c("ccode", "group_id", "year"), sep = "_")
unique_attacker_year$ccode_group <- paste(unique_attacker_year$ccode, 
                                          unique_attacker_year$group_id,sep="_")

names(unique_attacker_year)[4] <- "attacks"

#########################################################
# STEP 3: CREATE BASE COUNTRY-YEAR WITH COUNT ATTACK DATA
#########################################################

country_base_list <- list()

for(i in 1:length(unique(unique_attacker_year$ccode_group))){
  #create country-year base
  country_base <- as.data.frame(c(2000: 2022))
  names(country_base)[1] <- "year"
  
  # create time until the next election variable
  country_base$time_to_elect <- rep(0)
  country_base$time_to_elect <- ifelse(country_base$year == 2021 | country_base$year == 2017 | country_base$year == 2013 | 
                                         country_base$year == 2009 | country_base$year == 2005 |
                                         country_base$year == 2001, 1, country_base$time_to_elect)
  country_base$time_to_elect <- ifelse(country_base$year == 2020 |
                                         country_base$year == 2016 | country_base$year == 2012 | 
                                         country_base$year == 2008 | country_base$year == 2004 |
                                         country_base$year == 2000, 2, country_base$time_to_elect)
  country_base$time_to_elect <- ifelse(country_base$year == 2019 |
                                         country_base$year == 2015 | country_base$year == 2011 | 
                                         country_base$year == 2007 | country_base$year == 2003, 
                                       3, country_base$time_to_elect)
  
  #create dummy variable for each time to elect category (1-4)
  country_base$T_0 <- ifelse(country_base$time_to_elect == 0, 1, 0)
  country_base$T_1 <- ifelse(country_base$time_to_elect == 1, 1, 0)
  country_base$T_2 <- ifelse(country_base$time_to_elect == 2, 1, 0)
  country_base$T_3 <- ifelse(country_base$time_to_elect == 3, 1, 0)
  
  #create T variable (linear time)
  country_base$time <- (country_base$year + 1) - 2000
  country_base$ccode_group <- rep(unique(unique_attacker_year$ccode_group)[i])
  
  country_base_list[[length(country_base_list) + 1]] <- country_base
}

do.call(rbind, country_base_list) -> group_base_data

group_base_data <- separate(group_base_data, col = ccode_group, 
                            into = c("ccode", "group_id"), sep = "_")
#merge base with attack counts 
group_base_data$ccode <- as.numeric(group_base_data$ccode)
group_base_data$group_id <- as.numeric(group_base_data$group_id)
group_base_data$year <- as.numeric(group_base_data$year)

unique_attacker_year$ccode <- as.numeric(unique_attacker_year$ccode)
unique_attacker_year$group_id <- as.numeric(unique_attacker_year$group_id)
unique_attacker_year$year <- as.numeric(unique_attacker_year$year)

unique_attacker_year <- subset(unique_attacker_year, select = c(2:4))

group_year_attacks <- merge(group_base_data, unique_attacker_year, 
                            by.x = c("group_id", "year"), 
                            by.y = c("group_id", "year"), 
                            all.x = T)
group_year_attacks$attacks[is.na(group_year_attacks$attacks)] <- 0

#######################################
# STEP 4: CREATE ITU CANDIDATE TREATMENT
#######################################

# read in ITU council and ITU radio board data
ITU <- read.csv("raw_data/ITU.csv")
RB_ITU <- read.csv("raw_data/RB_ITU.csv")

names(RB_ITU)[1] <- "state"
names(RB_ITU)[4] <- "elect_year"

#start with elect year 2002
ITU <- subset(ITU, elect_year > 2000)
RB_ITU <- subset(RB_ITU, elect_year > 2000)
ITU_merge <- rbind(ITU, RB_ITU)

#add ccodes to ITU data
ITU_merge$ccode <- countrycode(ITU_merge$state, 
                               "country.name", "cown", nomatch = 0)

#add ccodes for states not matched by function
ITU_merge$ccode <- ifelse(ITU_merge$state == "United  States", 2, ITU_merge$ccode)
ITU_merge$ccode <- ifelse(ITU_merge$state == "Serbia", 345, ITU_merge$ccode)

#create unique ITU candidate years 
ITU_merge$ccode_year <- paste(ITU_merge$ccode, ITU_merge$elect_year, sep="_")
unique_ITU_year <- as.data.frame(table(ITU_merge$ccode_year))
unique_ITU_year <- separate(unique_ITU_year, col = Var1, into = c("ccode", "year"), 
                            sep = "_")
unique_ITU_year <- subset(unique_ITU_year, select = c(1,2))
unique_ITU_year$candidate <- rep(1)

#expand candidate years to include years prior to election and create candidate binary
#for merge into base data

unique_ITU_year$year <- as.numeric(unique_ITU_year$year)

year_subtract1 <- function(y, x){
  ccode <- as.data.frame(print(y))
  year <- as.data.frame(print(x - 1))
  df1 <- data.frame(ccode, year)
  colnames(df1) <- c("ccode","year")
  return(df1)
}

df1 <- year_subtract1(unique_ITU_year$ccode, unique_ITU_year$year)
df1$candidate <- rep(1)

year_subtract2 <- function(y, x){
  ccode <- as.data.frame(print(y))
  year <- as.data.frame(print(x - 2))
  df2 <- data.frame(ccode, year)
  colnames(df2) <- c("ccode","year")
  return(df2)
}

df2 <- year_subtract2(unique_ITU_year$ccode, unique_ITU_year$year)
df2$candidate <- rep(1)

year_subtract3 <- function(y, x){
  ccode <- as.data.frame(print(y))
  year <- as.data.frame(print(x - 3))
  df3 <- data.frame(ccode, year)
  colnames(df3) <- c("ccode","year")
  return(df3)
}

df3 <- year_subtract3(unique_ITU_year$ccode, unique_ITU_year$year)
df3$candidate <- rep(1)

#combine all years into one data set and exclude years prior to 2000
SY_ITU_dat <- rbind(unique_ITU_year, df1, df2, df3)
SY_ITU_dat <- subset(SY_ITU_dat, year > 1999)

#merge ITU candidate status into country year attack base data 
group_year_attacks <- merge(group_year_attacks, SY_ITU_dat, by = c("ccode", "year"), all.x = T)
group_year_attacks$candidate[is.na(group_year_attacks$candidate)] <- 0

#create spell factors for within analysis 

group_year_attacks$spell <- ifelse(group_year_attacks$time < 4, 1, 0)
group_year_attacks$spell <- ifelse(group_year_attacks$time < 8 & group_year_attacks$time > 3 , 2, group_year_attacks$spell)
group_year_attacks$spell <- ifelse(group_year_attacks$time < 12 & group_year_attacks$time > 7 , 3, group_year_attacks$spell)
group_year_attacks$spell <- ifelse(group_year_attacks$time < 16 & group_year_attacks$time > 11 , 4, group_year_attacks$spell)
group_year_attacks$spell <- ifelse(group_year_attacks$time < 20 & group_year_attacks$time > 15 , 5, group_year_attacks$spell)
group_year_attacks$spell <- ifelse(group_year_attacks$time > 19, 6, group_year_attacks$spell)

group_year_attacks$country_spell <- paste(group_year_attacks$ccode, group_year_attacks$spell, sep = "_")
group_year_attacks$country_spell <- factor(group_year_attacks$country_spell)

group_year_attacks$group_spell <- paste(group_year_attacks$group_id, group_year_attacks$spell, sep = "_")
group_year_attacks$group_spell <- factor(group_year_attacks$group_spell)

# make observations NA for units that could not measure prior to certain year
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2014, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2013, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2012, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2011, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2010, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2009, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2008, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2007, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2006, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2004, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2003, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2002, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2001, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2000, NA,group_year_attacks$attacks)

group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2014, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2013, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2012, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2011, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2010, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2009, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2008, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2007, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2006, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2004, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2003, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2002, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 101 & 
                                       group_year_attacks$year == 2001, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 110 & 
                                       group_year_attacks$year == 2000, NA,group_year_attacks$attacks)

group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2014, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2013, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2012, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2011, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2010, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2009, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2008, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2007, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2006, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2004, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2003, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2002, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2001, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2000, NA,group_year_attacks$attacks)

group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2014, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2013, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2012, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2011, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2010, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2009, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2008, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2007, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2006, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2004, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2003, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2002, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2001, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 112 & 
                                       group_year_attacks$year == 2000, NA,group_year_attacks$attacks)

group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2014, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2013, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2012, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2011, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2010, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2009, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2008, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2007, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2006, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2004, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2003, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2002, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2001, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 116 & 
                                       group_year_attacks$year == 2000, NA,group_year_attacks$attacks)

group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2014, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2013, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2012, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2011, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2010, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2009, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2008, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2007, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2006, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2004, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2003, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2002, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2001, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 119 & 
                                       group_year_attacks$year == 2000, NA,group_year_attacks$attacks)


group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2010, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2009, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2008, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2007, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2006, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2004, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2003, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2002, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2001, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 136 & 
                                       group_year_attacks$year == 2000, NA,group_year_attacks$attacks)

group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2018, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2017, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2016, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2015, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2014, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2013, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2012, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2011, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2010, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2009, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2008, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2007, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2006, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2004, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2003, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 103 & 
                                       group_year_attacks$year == 2002, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2001, NA,group_year_attacks$attacks)
group_year_attacks$attacks <- ifelse(group_year_attacks$group_id == 93 & 
                                       group_year_attacks$year == 2000, NA,group_year_attacks$attacks)
SY_within <- subset(group_year_attacks, candidate == 1)

group_year_attacks$election <- ifelse(group_year_attacks$candidate ==1 &
                                     group_year_attacks$T_0 == 1, 1, 0)
SY_within$election <- ifelse(SY_within$T_0 == 1, 1, 0)

length(unique(SY_within$group_id)) # 74 groups. Some drop out when subsetting to candidates.

#######################################################################
# STEP 5: CREATE FINAL "zoneh_year_data.RData" DATA SAVE AS RData
#######################################################################
zoneh_year_data <- subset(group_year_attacks, candidate == 1)

save(zoneh_year_data, file = "data/zoneh_year_data.RData")

#######################################
# STEP 6: CHECK MODEL 1 and 2
#######################################

LM3 <- glm.nb(attacks ~ election + factor(group_spell),
       data = zoneh_year_data, link = log, control=glm.control(maxit=50))
summary(LM3)

LM3_full <- glm.nb(attacks ~ election + factor(group_spell),
              data = group_year_attacks, link = log, control=glm.control(maxit=50))
summary(LM3_full)


